Final Project Code

Final Project Code

library(tidyverse)
library(ggplot2)
library(dplyr)
library(leaflet)
library(usmap)
library(sf)
library(readr)
library(scales)
library(tidyr)
library(lubridate)
library(pheatmap)

# Read in the raw data for Warnings and Citations
warnings = read_csv("2023_warning_data.csv", 
                         col_types = cols(Warnings_Date = col_date(format = "%m/%d/%Y"), 
                                          WEB_ADDRESS = col_skip(), PHONE_NUMBER = col_skip(), 
                                          NAME = col_skip()))

citations = read_csv("2023_citation_data.csv", 
                                col_types = cols(Date = col_date(format = "%m/%d/%Y"), 
                                                 WEB_ADDRESS = col_skip(), PHONE_NUMBER = col_skip(), 
                                                 NAME = col_skip()))

# Rename some columns
citations = citations %>%
  rename(ViolationDate = Date)

# change Gender to sex in warnings and change date column name
warnings = warnings %>%
  rename(Sex = Gender)

warnings = warnings %>%
  rename(ViolationDate = Warnings_Date)

# Adjust Citations and prepare for Merge
# Assumption that ID is the officer's ID
citations_processed = citations %>%
  mutate(
    outcome = "Citation",
    Gender = case_when(
      Sex == "M" ~ "Male",
      Sex == "F" ~ "Female",
      TRUE ~ "Other/Unknown"
    ),
    Year = year(ViolationDate),
    Month = month(ViolationDate),
    DayOfMonth = day(ViolationDate),
    Time = parse_date_time(Time, "HM"),
    data_type = "Citation"
  ) %>%
  select(
    outcome, Gender, Year, Month, DayOfMonth, ViolationDate, Time, Offense_Description = Charge,
    District = DISTRICT, Race, Ethnicity, Latitude, Longitude, OfficerID = ID, data_type
  )

# Adjust Warnings and prepare for Merge
warnings_processed = warnings %>%
  mutate(
    outcome = "Warning",
    Gender = case_when(
      Sex == "M" ~ "Male",
      Sex == "F" ~ "Female",
      TRUE ~ "Other/Unknown"
    ),
    Year = year(ViolationDate),
    Month = month(ViolationDate),
    DayOfMonth = day(ViolationDate),
    Time = parse_date_time(Time, "HM"),
    data_type = "Warning"
  ) %>%
  select(
    outcome, Gender, Year, Month, DayOfMonth, ViolationDate, Time, Offense_Description, District = DISTRICT, Race, 
    Ethnicity, Latitude = Lat, Longitude = Long, OfficerID = Officer_ID, data_type
  )

# Combined for ultimate Data coordination!
combined_wc = bind_rows(citations_processed, warnings_processed)

# Add ultimate binary outcome! 0 = Citation, 1 = Warning/ Got out of ticket
combined_wc = combined_wc %>%
  mutate(
    BinaryOutcome = ifelse(outcome == "Warning", 1,0)
  )

## Change to Title Case for District Names
combined_wc$District = tools::toTitleCase(tolower(combined_wc$District))

## Examining Unverified data
## After examination, unverified only makes up 0.0143 or 1.43% of the data set, so we will remove
## because it is a very small portion of the total proportion. 
# combined_wc %>%
#  count(District) %>%
#  mutate(Proportion = n / sum(n)) %>%
#  arrange(desc(n))

## Filter out Unverified and NA
combined_wc = combined_wc %>%
  filter(District != "Unverified")

combined_wc = combined_wc %>%
  filter(!is.na(District))

## Filter out Other/Unknown Gender
combined_wc_mf = combined_wc %>%
  filter(Gender != "Other/Unknown")


## Stacked Bar chart for Outcome and Gender
ggplot(combined_wc_mf, aes(x = outcome, fill = Gender)) +
  geom_bar() +
  labs(
    title = "Count of Outcomes by Gender",
    x = "Outcome Type",
    y = "# of Incident",
    fill = "Gender"
  ) + theme_gray() + theme(plot.title = element_text(hjust = 0.5)) + 
  scale_fill_manual(values = c("Female" = "lightpink2", "Male" = "lightsteelblue"))

## Now for some visuals: Gender Chart
## Examining the proportion of stops resulting in a Warning Vs Citation
## the Warning rate is the proportion of incidents that are warnings.
gender_warning_rate = combined_wc_mf %>%
  group_by(Gender) %>%
  summarise(
    Total_Incidents = n(),
    Warning_Rate = mean(BinaryOutcome)
  ) %>%
  ungroup()

gender_chart = ggplot(gender_warning_rate,
                      aes(x = Gender, y = Warning_Rate, fill = Gender)) +
  geom_col(show.legend = FALSE) +
  geom_text(aes(label = scales::percent(Warning_Rate, accuracy = 0.1)),
            vjust = -0.5, size = 5) +
  scale_y_continuous(labels = scales::percent, limits = c(0, max(gender_warning_rate$Warning_Rate) * 1.1)) +
  labs(
    title = "Warning Rate by Gender",
    subtitle = "Proportion of stops resulting in a Warning (vs Citation)",
    x = "Gender",
    y = "Warning Rate"
  ) + theme_gray() + theme(plot.title = element_text(hjust = 0.5)) + theme(plot.subtitle = element_text(hjust = 0.5)) +
  scale_fill_manual(values = c("Female" = "lightpink2", "Male" = "lightsteelblue"))

gender_chart

## Now the Chi-Squared Test starting with the Contingency Table
contingency_tbl = combined_wc_mf %>%
  filter(Gender %in% c("Male", "Female")) %>%
  select(Gender, BinaryOutcome) %>%
  table()

chi_sq_results = chisq.test(contingency_tbl)

expected_counts = chi_sq_results$expected
# Heatmap
library(pheatmap)
# observed count
observed_counts = chi_sq_results$observed
# Calculated expected above.
# Calculate residuals
pearson_residuals = chi_sq_results$residuals

# Calculate contributions to chi-square statistic
contributions = (observed_counts - expected_counts)^2 / expected_counts

total_chi_square = chi_sq_results$statistic

percentage_contributions = 100 * contributions / total_chi_square
colnames(percentage_contributions) = c("Citation", "Warning")
pheatmap(percentage_contributions,
         display_numbers = TRUE,
         cluster_rows = FALSE,
         cluster_cols = FALSE,
         main = "Percentage Contribution to Chi-Square Statistic",
         fontsize = 11,
         fontsize_row = 11,
         fontsize_col = 11,
         fontsize_number = 15
         )

library(dplyr)
library(lubridate)
library(ggplot2)
library(scales)
library(viridis)
Warning: package 'viridis' was built under R version 4.5.2
Loading required package: viridisLite

Attaching package: 'viridis'
The following object is masked from 'package:scales':

    viridis_pal
library(tidyr)

combined_wc_mf = combined_wc_mf %>%
  mutate(
    Hour = hour(Time),
    Minute = minute(Time),
    Time_of_day = case_when(
      Hour >= 5 & Hour < 10 ~ "Morning Rush (0500-1000)",
      Hour >= 10 & Hour < 15 ~ "Daytime (1000-1500)",
      Hour >= 15 & Hour < 19 ~ "Evening Rush (1500-1900)",
      TRUE ~ "Night/Late Night (1900-0500)"
    ) 
  )

stop_count_by_day = combined_wc_mf %>%
  group_by(DayOfMonth) %>%
  summarise(Stop_Count = n()) %>%
  ungroup()

# Make interactive heatmap using plotly
library(plotly)

Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':

    last_plot
The following object is masked from 'package:stats':

    filter
The following object is masked from 'package:graphics':

    layout
heatmap_days = combined_wc_mf %>%
  group_by(DayOfMonth, Hour) %>%
  summarise(
    TotalOutcomes = n(),
    TotalCitations = sum(BinaryOutcome == 0, na.rm = TRUE)
  ) %>%
  ungroup() %>%
  mutate(
    Citation_Rate = TotalCitations/TotalOutcomes
  ) %>%
  tidyr::complete(DayOfMonth, Hour, fill = list(TotalOutcomes = 0, Citation_Rate = 0))
`summarise()` has grouped output by 'DayOfMonth'. You can override using the
`.groups` argument.
# label for x-axis
all_hours = as.character(0:23)

# Convert to factor
heatmap_days$Hour = factor(heatmap_days$Hour)
heatmap_days$DayOfMonth = factor(heatmap_days$DayOfMonth)

# Text for interactive part
heatmap_days = heatmap_days %>%
  mutate(text = paste0("Hour: ", Hour, "\n", "Day: ", DayOfMonth, "\n", "Citation Rate: ",round(Citation_Rate,2)))

## trying to get all axes
plot_heatmap_discrete_axes = ggplot(
  heatmap_days, 
  aes(x = Hour, y = DayOfMonth, fill = Citation_Rate, text = text)
) +
  geom_tile(aes(width = 1, height = 1), color = "white", linewidth = 0.1) +
  scale_fill_viridis(
    option = "magma",
    direction = -1,
    name = "Citation Rate",
    labels = scales::percent
  ) +
  scale_x_discrete(name = "Hour of Day (0 = Midnight, 12 = Noon)", drop = FALSE) + 
  scale_y_discrete(drop = FALSE) + 
  labs(
    title = "Citation Rate by Day of Month and Hour of Day",
    subtitle = "Higher intensity (darker color) indicates a higher proportion of Citations",
    y = "Day of the Month"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    axis.text.x = element_text(vjust = 0.1, hjust = 0.1),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    plot.title = element_text(face = "bold", hjust = 0.5),
    plot.subtitle = element_text(hjust = 0.5)
  )

ggplotly(plot_heatmap_discrete_axes, tooltip = "text")
plot_heatmap_discrete_axes = ggplot(
  heatmap_days, 
  aes(x = Hour, y = DayOfMonth, fill = Citation_Rate, text = text)
) +
  geom_tile(aes(width = 1, height = 1), color = "white", linewidth = 0.1) +
  scale_fill_viridis(
    option = "magma",
    direction = -1,
    name = "Citation Rate",
    labels = scales::percent
  ) +
  scale_x_discrete(name = "Hour of Day (0 = Midnight, 12 = Noon)", drop = FALSE) + 
  scale_y_discrete(drop = FALSE) + 
  labs(
    title = "Citation Rate by Day of Month and Hour of Day",
    subtitle = "Higher intensity (darker color) indicates a higher proportion of Citations",
    y = "Day of the Month"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    axis.text.x = element_text(vjust = 0.1, hjust = 0.1),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    plot.title = element_text(face = "bold", hjust = 0.5),
    plot.subtitle = element_text(hjust = 0.5)
  )

plot_heatmap_discrete_axes

Arrest2023 <- read_csv("2023_arrest_data.csv", 
                       col_types = cols(ArresteeNumber = col_skip(), 
                                        ArrestDate = col_date(format = "%m/%d/%Y"), 
                                        ArrestTime = col_character(), WEB_ADDRESS = col_skip(), 
                                        PHONE_NUMBER = col_skip(), NAME = col_skip()))

## Remove any outliers by setting min/max lat and long.
min_lat = 38.6
max_lat = 39.0
min_lon = -77.6
max_lon = -77.0

## Using all data but excluding any coordinates that are not within coordinates
arrest_data_clean = Arrest2023 %>%
  filter(
    Latitude >= min_lat,
    Latitude <= max_lat,
    Longitude >= min_lon,
    Longitude <= max_lon
  )

## Include district boundaries by reading in shape file downloaded from Fairfax County site
district_boundaries = st_read("Supervisor_Districts/Supervisor_Districts.shp")
Reading layer `Supervisor_Districts' from data source 
  `C:\Users\Nat\OneDrive - George Mason University - O365 Production\Git\nathaniams.github.io\Supervisor_Districts\Supervisor_Districts.shp' 
  using driver `ESRI Shapefile'
Simple feature collection with 9 features and 12 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 11757190 ymin: 6905421 xmax: 11899000 ymax: 7070364
Projected CRS: NAD83 / Virginia North (ftUS)
## Transform to WGS84 projection to match basemap within leaflet
if (st_crs(district_boundaries)$epsg != 4326) {
  district_boundaries = st_transform(district_boundaries, 4326)
}
pal = colorFactor(
  palette = c("red", "gold1"), 
  domain = combined_wc_mf$outcome, 
  levels = c("Citation", "Warning") 
)

leaflet() %>%
  addProviderTiles(providers$OpenStreetMap) %>%
  addPolygons(
    data = district_boundaries,
    fillOpacity = 0.5,
    color = "black",
    weight = 1,
    popup = ~paste("District:", DISTRICT)
  ) %>%
  addCircleMarkers(
    data = combined_wc_mf,
    lng = ~Longitude,
    lat = ~Latitude,
    radius = 2.5,
    color = ~pal(outcome),
    stroke = FALSE,
    fillOpacity = 0.4,
    popup = ~paste(
      "Outcome: ", outcome, "<br>")
  )
library(dplyr)
library(lubridate)
library(caTools)
Warning: package 'caTools' was built under R version 4.5.2
library(pROC) 
Warning: package 'pROC' was built under R version 4.5.2
Type 'citation("pROC")' for a citation.

Attaching package: 'pROC'
The following objects are masked from 'package:stats':

    cov, smooth, var
#Add HourOfDay
combined_wc_mf = combined_wc_mf %>%
  mutate(
    HourOfDay = hour(Time))

combined_wc_mf = combined_wc_mf %>%
  mutate(
    DayOfWeek = factor(
      wday(ViolationDate, label = TRUE, week_start = 1),
      levels = c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")))

model_columns_chrono = c(
  "BinaryOutcome",
  "HourOfDay",
  "DayOfMonth",
  "DayOfWeek",
  "District"
)

data_for_modeling = combined_wc_mf %>%
  select(all_of(model_columns_chrono)) %>%
  mutate(across(c(HourOfDay, DayOfMonth, DayOfWeek, District), as.factor)) %>%
  mutate(BinaryOutcome = factor(BinaryOutcome, levels = c(0, 1))) %>%
  na.omit()

data_for_modeling
# A tibble: 88,320 × 5
   BinaryOutcome HourOfDay DayOfMonth DayOfWeek District   
   <fct>         <fct>     <fct>      <ord>     <fct>      
 1 0             16        12         Wed       Dranesville
 2 0             15        13         Mon       Providence 
 3 0             15        13         Mon       Providence 
 4 0             15        13         Mon       Providence 
 5 0             5         9          Thu       Sully      
 6 0             0         10         Mon       Franconia  
 7 0             1         10         Mon       Franconia  
 8 0             13        11         Tue       Dranesville
 9 0             11        31         Tue       Hunter Mill
10 0             11        10         Mon       Hunter Mill
# ℹ 88,310 more rows
set.seed(42)

sample_split_chrono = sample.split(
  Y = data_for_modeling$BinaryOutcome,
  SplitRatio = 0.70
)

train_set_chrono = subset(data_for_modeling, sample_split_chrono == TRUE)
test_set_chrono = subset(data_for_modeling, sample_split_chrono == FALSE)

logistic_model_chrono = glm(
  formula = BinaryOutcome ~ .,
  data = train_set_chrono,
  family = binomial(link = "logit")
)

odds_ratio_chrono = exp(coef(logistic_model_chrono))

odds_ratio_df_chrono = data.frame(
  Variable = names(odds_ratio_chrono),
  OddsRatio = odds_ratio_chrono
)

print(odds_ratio_df_chrono %>%
        filter(Variable != "(Intercept)") %>%
        arrange(OddsRatio) %>%
        head(30))
                               Variable OddsRatio
HourOfDay6                   HourOfDay6 0.3700923
HourOfDay5                   HourOfDay5 0.5129970
HourOfDay14                 HourOfDay14 0.6542359
DayOfMonth25               DayOfMonth25 0.6566403
HourOfDay13                 HourOfDay13 0.6668524
HourOfDay11                 HourOfDay11 0.6733436
HourOfDay16                 HourOfDay16 0.6749441
DayOfMonth12               DayOfMonth12 0.7050878
DayOfMonth29               DayOfMonth29 0.7064639
HourOfDay12                 HourOfDay12 0.7121153
HourOfDay17                 HourOfDay17 0.7191464
DayOfMonth11               DayOfMonth11 0.7217906
DayOfMonth26               DayOfMonth26 0.7264822
HourOfDay10                 HourOfDay10 0.7278707
DayOfMonth9                 DayOfMonth9 0.7309660
DayOfMonth3                 DayOfMonth3 0.7333756
DayOfMonth4                 DayOfMonth4 0.7421692
DayOfMonth5                 DayOfMonth5 0.7425681
HourOfDay15                 HourOfDay15 0.7473944
DayOfMonth30               DayOfMonth30 0.7492766
DayOfMonth18               DayOfMonth18 0.7585408
HourOfDay18                 HourOfDay18 0.7700971
DayOfMonth20               DayOfMonth20 0.7708255
DayOfMonth17               DayOfMonth17 0.7771909
HourOfDay9                   HourOfDay9 0.7811855
DayOfMonth27               DayOfMonth27 0.7826999
DayOfMonth21               DayOfMonth21 0.7903996
DistrictSpringfield DistrictSpringfield 0.7983477
DayOfMonth8                 DayOfMonth8 0.8061866
HourOfDay4                   HourOfDay4 0.8063216
## new visual
top_citation_chrono = odds_ratio_df_chrono %>%
  filter(Variable != "(Intercept)") %>%
  arrange(OddsRatio) %>%
  head(5) %>%
  mutate(Effect = "Stronger Citation Likelihood")

top_warning_chrono = odds_ratio_df_chrono %>%
  filter(Variable != "(Intercept)") %>%
  arrange(desc(OddsRatio)) %>%
  head(5) %>%
  mutate(Effect = "Stronger Warning Likelihood")

plot_data_chrono = bind_rows(top_citation_chrono, top_warning_chrono)

plot_data_chrono = plot_data_chrono %>%
  mutate(
    Variable = gsub("HourOfDay", "Hr:", Variable),
    Variable = gsub("DayOfMonth", "Day:", Variable),
    Variable = gsub("DayOfWeek", "Week:", Variable),
    Variable = gsub("District", "Dist:", Variable)
  )

odds_ratio_plot_chrono = ggplot(plot_data_chrono, aes(x = Variable, y = OddsRatio, fill = Effect)) +
  geom_bar(stat = "identity", position = "identity") +
  geom_hline(yintercept = 1.0, linetype = "dashed", color = "black", linewidth = 1) +
  scale_y_continuous(
    trans = 'log2', 
    breaks = c(0.3, 0.5, 0.7, 1.0, 1.5, 2.0, 2.5),
    labels = c("0.3", "0.5", "0.7", "1.0", "1.5", "2.0", "2.5")
  ) +
  labs(
    title = "Odds Ratios of Date and Location Factors on Issuing a Warning",
    subtitle = "Based on Hour, Day of Month, Day of Week, and District",
    x = "Predictor Variable",
    y = "Odds Ratio (Warning vs. Citation)",
    fill = "Enforcement Discretion"
  ) +
  scale_fill_manual(values = c("Stronger Citation Likelihood" = "coral3", "Stronger Warning Likelihood" = "gold1")) +
  
  # Flip coordinates for a horizontal bar chart (easier to read labels)
  coord_flip() +
  theme_grey(base_size = 11)

odds_ratio_plot_chrono

## Calculate AUC
library(pROC)

probability_chrono = predict(
  logistic_model_chrono,
  newdata = test_set_chrono,
  type = "response"
)

roc_obj_chrono = roc(
  response = test_set_chrono$BinaryOutcome,
  predictor = probability_chrono,
  levels = c(0,1) 
)
Setting direction: controls < cases
auc_value_chrono = auc(roc_obj_chrono)

round(auc_value_chrono, 4)
[1] 0.6289
combined_data <- read.csv("combined_crime_data.csv")

library(ggplot2)

ggplot(combined_data, aes(x = Outcome, fill = District)) +
  geom_bar(position = "stack") + # change to "fill" for proportions
  coord_flip() +
  labs(
    title = "Enforcement Outcomes by District",
    x = "Outcome",
    y = "Count",
    fill = "District"
  ) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5))

table(combined_data$District, combined_data$Outcome)
              
               Arrest Citation Warning
  BRADDOCK       1221     7647    2645
  DRANESVILLE     881     5055    2088
  FRANCONIA      6011     6534    3499
  HUNTER MILL    2001     6244    2474
  MASON          5833     5490    2678
  MOUNT VERNON   3040     3662    2451
  PROVIDENCE     9315     4772    1941
  SPRINGFIELD    1176     9909    2672
  SULLY          2300    14854    3758
  Unverified      356     1127     154
chisq.test(table(combined_data$District, combined_data$Outcome))

    Pearson's Chi-squared test

data:  table(combined_data$District, combined_data$Outcome)
X-squared = 20692, df = 18, p-value < 2.2e-16
library(dplyr)
library(tidyr)

demo_long <- combined_data %>%
  pivot_longer(
    cols = c(Gender, Race, Ethnicity),
    names_to = "variable",
    values_to = "group"
  )

ggplot(demo_long, aes(x = Outcome, fill = group)) +
  geom_bar(position = "stack") +
  facet_wrap(~ variable, scales = "free_x") +
  coord_flip() +
  labs(
    title = "Enforcement Outcomes by Demographics",
    x = "Outcome",
    y = "Count",
    fill = "Demographic Group"
  ) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5))

library(nnet)

combined_data$Outcome  <- factor(combined_data$Outcome)
combined_data$Race     <- factor(combined_data$Race)
combined_data$Gender   <- factor(combined_data$Gender)
combined_data$District <- factor(combined_data$District)
combined_data$Ethnicity <- factor(combined_data$Ethnicity)

model <- multinom(Outcome ~ Race + Gender + District + Ethnicity, data = combined_data)
# weights:  69 (44 variable)
initial  value 133797.793412 
iter  10 value 110273.346261
iter  20 value 109828.994978
iter  30 value 108047.740898
iter  40 value 106985.637011
iter  50 value 106983.922418
final  value 106983.911647 
converged
Call:
multinom(formula = Outcome ~ Race + Gender + District + Ethnicity, 
    data = combined_data)

Coefficients:
         (Intercept)     RaceB    RaceI    RaceU      RaceW    GenderM
Citation    6.446043 -1.270532 1.902943 2.020289 -0.1401901 -0.5121848
Warning    -2.979146 -1.162020 1.981047 1.571784 -0.0923412 -0.6626383
           GenderO  GenderU  GenderX DistrictDRANESVILLE DistrictFRANCONIA
Citation  6.010004 7.558657 9.927126          -0.0666923         -1.475278
Warning  -4.276199 9.096641 8.356495           0.1233499         -1.022930
         DistrictHUNTER MILL DistrictMASON DistrictMOUNT VERNON
Citation          -0.6712792     -1.629807           -1.3727811
Warning           -0.5462693     -1.210128           -0.7197438
         DistrictPROVIDENCE DistrictSPRINGFIELD DistrictSULLY
Citation          -2.389145           0.3690253     0.1267567
Warning           -2.237610           0.1084139    -0.1961090
         DistrictUnverified EthnicityB EthnicityH EthnicityN EthnicityU
Citation         -0.7239367   2.511770  -4.582736  -3.813774  -2.729531
Warning          -1.6738146  -2.361734   3.525458   4.685802   5.843154

Std. Errors:
         (Intercept)      RaceB     RaceI     RaceU      RaceW    GenderM
Citation  0.03690439 0.03664586 0.2797341 0.1046626 0.03631312 0.01796448
Warning   0.04097933 0.04157964 0.2865267 0.1097935 0.04063792 0.02044004
              GenderO   GenderU   GenderX DistrictDRANESVILLE DistrictFRANCONIA
Citation 1.384588e-08 0.1736266 0.5231826          0.04893589        0.03673167
Warning  1.161816e-09 0.1736270 0.5231557          0.05426427        0.04186914
         DistrictHUNTER MILL DistrictMASON DistrictMOUNT VERNON
Citation          0.04137716    0.03725741           0.04078278
Warning           0.04710735    0.04304069           0.04551201
         DistrictPROVIDENCE DistrictSPRINGFIELD DistrictSULLY
Citation         0.03681733          0.04456369    0.03906339
Warning          0.04385473          0.05022441    0.04459307
         DistrictUnverified   EthnicityB EthnicityH EthnicityN EthnicityU
Citation         0.07127717 9.699283e-08 0.02127470 0.01745390 0.03459567
Warning          0.10481264 9.677125e-08 0.02480225 0.01935154 0.03690519

Residual Deviance: 213967.8 
AIC: 214055.8